# encoding: utf-8
"""
@author: 夏洛
@QQ: 1972386194
@file: 01-demo.py
"""

from scrapy import Selector

body = '<html><head><title>Hello World</title></head><body></body></html>'
selector = Selector(text=body)
title = selector.xpath('//title/text()').extract_first()

# extract()   列表返回 多个数据
# extract_first()  返回第一个匹配的数据  字符串
html = '''
<html>
 <head>
  <base href='http://example.com/' />
  <title>Example website</title>
 </head>
 <body>
  <div id='images'>
   <a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a>
   <a href='image2.html'>Name: My image 2 <br /><img src='image2_thumb.jpg' /></a>
   <a href='image3.html'>Name: My image 3 <br /><img src='image3_thumb.jpg' /></a>
   <a href='image4.html'>Name: My image 4 <br /><img src='image4_thumb.jpg' /></a>
   <a href='image5.html'>Name: My image 5 <br /><img src='image5_thumb.jpg' /></a>
  </div>
 </body>
</html>
'''

response = Selector(text=html)
result = response.xpath('//a')
print(result)
# print(result.xpath('./img').extract())

print(response.css('a'))

print(response.css('a[href="image1.html"]').extract())
# 查找 a 节点内的 img 节点返回列表
print(response.css('a[href="image1.html"] img').extract())

# 提取匹配到的节点
print(response.css('a[href="image1.html"] img').extract_first())

# 提取值
print(response.css('a[href="image1.html"]::text').extract_first())
# 提取属性
print(response.css('a[href="image1.html"] img::attr(src)').extract_first())

#
data = response.xpath('//a/text()').re('Name:\s(.*)')
print(data)

print(response.xpath('//a/text()').re('(.*?):\s(.*)'))
